*Exploration of main variables for ESEC

*Tim Goedemé, 13/07/2020

/*

*The output of this do-file is not used in the article. 
For results, see rather the research note:
Goedemé, 2019, A note on the replication of the European Socio-economic 
Classification (ESeC) in the EU Statistics on Income and Living Conditions 
(EU-SILC). INET Oxford Working Paper No. 2019-17, Oxford: Institute for New 
Economic Thinking at the Oxford Martin School, University of Oxford.


Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. 
This file can be changed and re-shared for non-commercial use, as long as our original work 
is recognised and the revised work is made available under the same conditions.

When using this do-file, please cite as:
Goedemé, T., Nolan, B., Paskov, M., & Weisstanner, D. (2021). 
Occupational Social Class and Earnings Inequality in Europe: A Comparative Assessment. 
In: Social Indicators Research. DOI: https://doi.org/10.1007/s11205-021-02746-z; https://timgoedeme.com/tools/esec-in-eu-silc/


*/

*Globals
********

global place1 <<data directory>>
global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK

global basvars country year hid pid psu1 strata1 rb050
global pvars pl030 pl031 pl040 pl050 pl051 pl130 pl150


*Check number of categories that are represented in the data
************************************************************

local varlist pl030 pl031 pl040 pl050 pl051 pl130 pl150
foreach v of local varlist {
	di "***`v'***"
	di "****************"
	cap mat drop res_`v'
	
	foreach ctry of global countries {
		di "`ctry'", _continue
		cap mat drop tempmat
		
		forvalues year=2004(1)2018 {
			local val=.
			cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
			if _rc==0 {
				use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
				cap ta `v'
				local val = r(r)
			}
			mat def tempmat = nullmat(tempmat) , `val'
		}
		mat def res_`v' = nullmat(res_`v') \ tempmat
	}
	mat rownames res_`v' = $countries
	mat colnames res_`v' = 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}
foreach v of local varlist {
	di "***`v'***"
	mat li res_`v'
}

*pl030: missing for Greece except for 2008? and MT?

*Check % of active age population with a value filled out for the variables of interest
****************************************************************************************

global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK
local varlist pl030 pl031 pl040 pl050 pl051 pl130 pl150
foreach v of local varlist {
	di "***`v'***"
	di "****************"
	cap mat drop res_`v'
	
	foreach ctry of global countries {
		di "`ctry'", _continue
		
		cap mat drop tempmat
		
		forvalues year=2004(1)2018 {
			local val=.
			cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
			
			if _rc==0 {
				use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
				gen actage = ((year-rb080)>=18 & (year-rb080)<65)
				cap gen ok = (`v'!=.)
				
				cap sum ok if year==`year' & actage==1
				if _rc==0 local val = r(mean)
				else local val=.
				
			}
			mat def tempmat = nullmat(tempmat) , `val'
		}
		mat def res_`v' = nullmat(res_`v') \ tempmat
	}
	mat rownames res_`v' = $countries
	mat colnames res_`v' = 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}
local varlist pl030 pl031 pl040 pl050 pl051 pl130 pl150
foreach v of local varlist {
	di "***`v'***"
	mat li res_`v'
}

***Same, but now PL130 just for the self-employed (PL040==1 | PL040==2)


global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK
local varlist pl130
foreach v of local varlist {
	di "***`v'***"
	di "****************"
	cap mat drop res_`v'
	
	foreach ctry of global countries {
		di "`ctry'", _continue
		
		cap mat drop tempmat
		
		forvalues year=2004(1)2018 {
			local val=.
			cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
			
			if _rc==0 {
				use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
				
				gen actage = ((year-rb080)>=18 & (year-rb080)<65)
				cap gen ok = (`v'!=.)
			
			
				cap sum ok if year==`year' & actage==1 & (pl040==1 | pl040==2)
				if _rc==0 local val = r(mean)
				else local val=.
			}
			
			mat def tempmat = nullmat(tempmat) , `val'
			 
		}
		mat def res_`v' = nullmat(res_`v') \ tempmat
	}
	mat rownames res_`v' = $countries
	mat colnames res_`v' = 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}
foreach v of local varlist {
	di "***`v'***"
	mat li res_`v'
}

***Same, but now PL150 just for employees (PL040==3 | PL040==4)

global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK
local varlist pl150
foreach v of local varlist {
	di "***`v'***"
	di "****************"
	cap mat drop res_`v'
	
	foreach ctry of global countries {
		di "`ctry'", _continue
		
		cap mat drop tempmat
		
		forvalues year=2004(1)2018 {
			local val=.
			cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
			
			if _rc==0 {
				use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
				
				gen actage = ((year-rb080)>=18 & (year-rb080)<65)
				cap gen ok = (`v'!=.)
			
			
				cap sum ok if year==`year' & actage==1 & (pl040==3 | pl040==4)
				if _rc==0 local val = r(mean)
				else local val=.
			}
			
			mat def tempmat = nullmat(tempmat) , `val'
			 
		}
		mat def res_`v' = nullmat(res_`v') \ tempmat
	}
	mat rownames res_`v' = $countries
	mat colnames res_`v' = 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}
foreach v of local varlist {
	di "***`v'***"
	mat li res_`v'
}


***Same, but now check data availability for the unemployed (PL030==3 | PL031==5)
global countries AT BE BG CH CY CZ DE DK EE EL ES FI FR HR HU IE IS IT LT LU LV MT NL NO PL PT RO RS SE SI SK UK
global select DK FI IS NL NO SE SI

local varlist pl040 pl050 pl051 pl150
foreach v of local varlist {
	di "***`v'***"
	di "****************"
	cap mat drop res_`v'
	
	foreach ctry of global countries {
		di "`ctry'", _continue
		
		cap mat drop tempmat
		
		forvalues year=2004(1)2018 {
			local val=.
			cap confirm file "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta"
			
			if _rc==0 {
				use "${place1}\`ctry'\\`year'\c`ctry'`year'_all.dta", clear
				
				gen actage = ((year-rb080)>=18 & (year-rb080)<65)
				cap gen ok = (`v'!=.)
				
				gen unemployed=0
				if `year'<2009 replace unemployed=1 if pl030==3
				if `year'>=2009 replace unemployed=1 if pl031==5
			
				if ustrregexm("${select}", "`ctry'") == 0 cap sum ok if year==`year' & actage==1 & unemployed==1
				else cap sum ok if year==`year' & actage==1 & unemployed==1 & px040==2
				if _rc==0 local val = r(mean)
				else local val=.
			}
			
			mat def tempmat = nullmat(tempmat) , `val'
			 
		}
		mat def res_`v' = nullmat(res_`v') \ tempmat
	}
	mat rownames res_`v' = $countries
	mat colnames res_`v' = 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}
local varlist pl040 pl050 pl051 pl150
foreach v of local varlist {
	di "***`v'***"
	mat li res_`v'
}

